Libraries

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  3.0.0     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
library(wordbankr)
library(here)
## here() starts at /cloud/project
library(RColorBrewer)
library(wesanderson)
library(ggthemes)
library(beyonce)
## Registered S3 method overwritten by 'beyonce':
##   method        from       
##   print.palette wesanderson
library(viridis)
## Loading required package: viridisLite
library(forcats)
library(colorblindr)
## Loading required package: colorspace
library(ggrepel)

Reading the data

sounds <- read_csv(here::here("data", "animal_sounds_summary.csv"))
## Parsed with column specification:
## cols(
##   age = col_double(),
##   sound = col_character(),
##   kids_produce = col_double(),
##   kids_understand = col_double(),
##   kids_respond = col_double(),
##   prop_produce = col_double(),
##   prop_understand = col_double()
## )

Know your data! (Challenge #1)

glimpse(sounds)
## Rows: 33
## Columns: 7
## $ age             <dbl> 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12,…
## $ sound           <chr> "cockadoodledoo", "meow", "woof woof", "cockadoodledo…
## $ kids_produce    <dbl> 1, 0, 3, 0, 2, 2, 0, 5, 4, 0, 5, 12, 0, 12, 28, 9, 12…
## $ kids_understand <dbl> 3, 10, 12, 2, 21, 22, 9, 41, 40, 4, 36, 32, 16, 59, 5…
## $ kids_respond    <dbl> 35, 35, 35, 91, 93, 93, 139, 145, 143, 94, 94, 94, 14…
## $ prop_produce    <dbl> 0.02857143, 0.00000000, 0.08571429, 0.00000000, 0.021…
## $ prop_understand <dbl> 0.08571429, 0.28571429, 0.34285714, 0.02197802, 0.225…
sounds %>% 
  distinct(sound) %>% 
  knitr::kable()
sound
cockadoodledoo
meow
woof woof
sounds %>% 
  distinct(age) %>% 
  pull
##  [1]  8  9 10 11 12 13 14 15 16 17 18

How many variables?

names(sounds)
## [1] "age"             "sound"           "kids_produce"    "kids_understand"
## [5] "kids_respond"    "prop_produce"    "prop_understand"

How many types of animal sounds, and what are they?

sounds %>% 
  group_by(sound) %>% 
  summarize(total_produce = sum(kids_produce)) %>% 
  knitr::kable()
sound total_produce
cockadoodledoo 148
meow 681
woof woof 940
## # Initial EDA Plots

How many kids produce each kind of sound?

Bar Plot

ggplot(sounds, aes(x = sound, y = kids_produce)) + 
  geom_col() +
  labs(x = "Sound", y = "Total Children Producing")

ggplot(sounds, aes(x = age, y = prop_produce)) + 
  geom_col() +
  labs(x = "Age (mos)", y = "Proportion of Children Producing") +
  facet_wrap(~sound)

## Scatter Plot

ggplot(sounds, aes(x = age, y = prop_produce)) + 
  geom_point() +
  labs(x = "Age (mos)", y = "Proportion of Children Producing") +
  facet_wrap(~sound)

## Discrete Plot

ggplot(sounds, aes(x = age, y = prop_produce)) + 
  geom_point(size = 2) +
  labs(x = "Age (months)", y = "Proportion of Children Producing")

Which are continuous

Which are categorical and ordinal?

How many total kids?

sounds%>% count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1    33

How many different ages?

How many kids per age?

Remember: Make sureto adjust the labels!!

Default discrete palette (Challenge #2)

ggplot(sounds, aes(x = age, y = prop_produce)) + 
  geom_point(aes(color = sound), size = 2) +
  labs(x = "Age (months)", y = "Proportion of Children Producing")

Adding lines (Challenge #3)

ggplot(sounds, aes(x = age, y = prop_produce)) + 
  geom_line() +
  geom_point(aes(color = sound), size = 2) +
  labs(x = "Age (months)", y = "Proportion of Children Producing") 

# A possible solution
ggplot(sounds, aes(x = age, y = prop_produce)) + 
  geom_line(aes(group = sound)) +
  geom_point(aes(color = sound), size = 2) +
  labs(x = "Age (months)", y = "Proportion of Children Producing") 

Challenge #4

Coloring both lines and points

# Does this look right? yes!
ggplot(sounds, aes(x = age, y = prop_produce, color = sound)) + 
  geom_line() +
  geom_point(size = 2) +
  labs(x = "Age (months)", y = "Proportion of Children Producing") 

Using geom_smooth()

ggplot(sounds, aes(x = age, 
                         y = prop_produce, 
                         color = sound)) + 
  geom_smooth(se = FALSE, lwd = .5) +
  geom_point(size = 2)  +
  labs(x = "Age (months)", y = "Proportion of Children Producing") 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Controlling factor order

library(forcats)
sounds <- sounds %>% 
  mutate(sound = as.factor(sound))

sound_traj <- ggplot(sounds, aes(x = age, 
                         y = prop_produce, 
                         color = fct_reorder2(sound, age, prop_produce))) +
  geom_smooth(se = FALSE, lwd = .5) +
  geom_point(size = 2) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       color = "sound")
sound_traj
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Modifying default colors

Experiment with each property in scale_color_hue() to get a sense of what it does.

sound_traj +
  scale_color_hue()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Change hue (l and c are defaults)
sound_traj +
  scale_color_hue(h = c(0, 90), l = 65, c = 100)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Use luminance=45, instead of default 65
sound_traj +
  scale_color_hue(l = 45)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Reduce saturation (chroma) from 100 to 50, and increase luminance
sound_traj +
  scale_color_hue(l = 75, c = 50)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Setting discrete colors

sound_traj +
  scale_color_manual(values = c("cornflowerblue", 
                                "seagreen", "coral"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Experiment with scale_color_manual() and some of the various named colors that come built-in to R!

Challenge #5

Why doesn’t the code block change the colors?

ggplot(sounds, aes(x = age, 
                         y = prop_produce, 
                         color = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(se = FALSE, lwd = .5) +
  geom_point(size = 2) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       color = "sound") +
  scale_fill_manual(values = c("cornflowerblue", 
                               "seagreen", "coral"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(sounds, aes(x = age, 
                         y = prop_produce, 
                         fill = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(se = FALSE, lwd = .5) +
  geom_point(size = 2) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       fill = "sound") +
  scale_fill_manual(values = c("cornflowerblue", 
                               "seagreen", "coral"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Challenge #6

sound_traj
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(sounds, aes(x = age, 
                   y = prop_produce, 
                   fill = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(aes(color = fct_reorder2(sound, age, prop_produce)),
              se = FALSE, lwd = .5, show.legend = FALSE) +
  geom_point(size = 2, shape = 21) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       fill = "sound")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(sounds, aes(x = age, 
                   y = prop_produce, 
                   fill = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(aes(color = fct_reorder2(sound, age, prop_produce)),
              se = FALSE, lwd = .5, show.legend = FALSE) +
  geom_point(size = 2, shape = 21) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       fill = "sound") +
  scale_fill_manual(values = c("cornflowerblue", 
                               "seagreen", "coral")) +
  scale_color_manual(values = c("cornflowerblue", 
                               "seagreen", "coral"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

my_colors <- c("cadetblue", "steelblue", "salmon") # quote color names
sound_traj +
  scale_color_manual(values = my_colors) # note: not in quotes
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Challenge #7

# from https://github.com/mwaskom/seaborn/blob/master/seaborn/palettes.py
sb_colorblind <- c("#0072B2", "#009E73", "#D55E00",
                        "#CC79A7", "#F0E442", "#56B4E9")
sound_traj +
  scale_colour_manual(values = sb_colorblind)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Built-in descrete color palettes

Using scale_color_brewer()

library(RColorBrewer)
brewer.pal(5, "Dark2") # list 5 hex colors
## [1] "#1B9E77" "#D95F02" "#7570B3" "#E7298A" "#66A61E"
display.brewer.pal(5, "Dark2") # view 5 hex colors

sound_traj +
  scale_color_brewer(palette = "Dark2")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Using wesanderson

library(wesanderson)
names(wes_palettes) # all the palette names
##  [1] "BottleRocket1"  "BottleRocket2"  "Rushmore1"      "Rushmore"      
##  [5] "Royal1"         "Royal2"         "Zissou1"        "Darjeeling1"   
##  [9] "Darjeeling2"    "Chevalier1"     "FantasticFox1"  "Moonrise1"     
## [13] "Moonrise2"      "Moonrise3"      "Cavalcanti1"    "GrandBudapest1"
## [17] "GrandBudapest2" "IsleofDogs1"    "IsleofDogs2"
wes_palette("GrandBudapest2") # view named palette

wes_palette("GrandBudapest2")[1:4] # list first 4 hex colors
## [1] "#E6A0C4" "#C6CDF7" "#D8A499" "#7294D4"
wes_palette("GrandBudapest2")[c(1,4)] # list colors 1 and 4
## [1] "#E6A0C4" "#7294D4"
sound_traj +
  scale_color_manual(values = wes_palette("Darjeeling1"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sound_traj +
  scale_color_manual(values = wes_palette("FantasticFox1"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Challenge #8

sound_traj +
  scale_color_manual(values = wes_palette("Darjeeling1")[3:5])
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sound_traj +
  scale_color_manual(values = wes_palette("FantasticFox1")[c(2, 3, 5)])
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Using ggthemes

library(ggthemes)
sound_traj +
  scale_color_fivethirtyeight()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sound_traj +
  scale_color_economist()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Using beyonce

library(beyonce)
beyonce_palette(18)

sound_traj +
  scale_color_manual(values = beyonce_palette(18)[3:5])
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sound_traj +
  scale_color_manual(values = beyonce_palette(18)[c(1, 4, 5)])
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Using viridis

sound_traj +
  scale_color_viridis(discrete = TRUE) +
  theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sound_traj +
  scale_color_viridis(discrete = TRUE, option = "plasma") +
  theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Challenge #9

ggplot(sounds, aes(x = age, 
                   y = prop_produce, 
                   fill = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(aes(color = fct_reorder2(sound, age, prop_produce)),
              se = FALSE, lwd = .5, show.legend = FALSE) +
  geom_point(size = 2, shape = 21, colour = "midnightblue") +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       fill = "sound") +
  scale_fill_viridis(discrete = TRUE) +
  scale_color_viridis(discrete = TRUE) +
  theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Greyscale

Experimenting with scale_color_grey()/scale_fill_grey()

sound_traj +
  scale_color_grey() +
  theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

sound_traj +
  scale_color_grey(start = 0.2, end = .8) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(sounds, aes(x = age, 
                   y = prop_produce, 
                   fill = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(aes(color = fct_reorder2(sound, age, prop_produce)),
              se = FALSE, lwd = .5, show.legend = FALSE) +
  geom_point(size = 2, shape = 21) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       fill = "sound") +
  scale_fill_grey(start = 0.3, end = 1) +
  scale_color_grey(start = 0.3, end = 1) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(sounds, aes(x = age, 
                   y = prop_produce, 
                   fill = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(aes(lty = fct_reorder2(sound, age, prop_produce)), color = "black",
              se = FALSE, lwd = .5, show.legend = FALSE) +
  geom_point(size = 2, shape = 21) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       fill = "sound") +
  scale_fill_grey(start = 0.3, end = 1) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(sounds, aes(x = age, 
                   y = prop_produce, 
                   fill = fct_reorder2(sound, age, prop_produce))) + 
  geom_smooth(aes(color = fct_reorder2(sound, age, prop_produce),
                  lty = fct_reorder2(sound, age, prop_produce)),
              se = FALSE, lwd = .5, show.legend = FALSE) +
  geom_point(size = 2, shape = 21) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing", 
       fill = "sound") +
  scale_fill_grey(start = 0.3, end = .8) +
  scale_color_grey(start = 0.3, end = .8) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Using colorblindr

my_sound_traj <- sound_traj +
  scale_color_manual(values = beyonce_palette(18)[c(1, 4, 5)])
library(colorblindr)
cvd_grid(my_sound_traj)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

cb_sound_traj <- sound_traj +
  scale_color_OkabeIto()

cb_sound_traj
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

cvd_grid(cb_sound_traj)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

# To use for line and point colors, add
sound_traj +
  scale_colour_manual(values = cbbPalette[c(3, 7, 8)])
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Repel labels

library(ggrepel)

sounds <- sounds %>%
  mutate(label = case_when(
    age == max(age) ~ sound))

ggplot(sounds, aes(x = age, 
                   y = prop_produce, 
                   color = fct_reorder2(sound, age, prop_produce))) +
  geom_smooth(se = FALSE, lwd = .5) +
  geom_point(size = 2) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing") +
  geom_text_repel(aes(label = label),
                  nudge_x = 1,
                  direction = "y",
                  na.rm = TRUE) +
  guides(color = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Continuous COlors

Experiment a bit with scale_color_gradient()/scale_fill_gradient()!

sound_by_age <- ggplot(sounds, aes(x = age, 
                                   y = prop_produce, 
                                   color = age)) +
  geom_line(aes(group = sound), lwd = .5) +
  geom_point(size = 2) +
  labs(x = "Age (months)", 
       y = "Proportion of Children Producing")
sound_by_age

sound_by_age +
  scale_color_gradient()

sound_by_age +
  scale_color_gradient(trans = "reverse")

sound_by_age +
  scale_color_gradient(low = "white", high = "red")

sound_by_age +
  scale_color_gradient(low = "grey90", high = "black")

# Diverging color scheme
med_age <- sounds %>% 
  summarize(mos = median(age)) %>% 
  pull()
sound_by_age +
  scale_color_gradient2(midpoint = med_age,
                      low="blue", mid="white", high="red" )

Built-in continuous palettes

sound_by_age +
  scale_color_gradientn(colours = brewer.pal(n=5, name="PuBuGn"))

sound_by_age +
  scale_color_gradientn(colours = rev(brewer.pal(n=5, name="PuBuGn")))

Experiment a bit with RColorBrewer and viridis

sound_by_age +
  scale_color_viridis()

sound_by_age +
  scale_color_viridis(option = "magma")

sound_by_age +
  scale_color_viridis(option = "inferno", begin = 1, end = 0)

Challenge #10

Loading dataset

salary<-read_csv("adult-training.csv")
## Parsed with column specification:
## cols(
##   `39` = col_double(),
##   `State-gov` = col_character(),
##   `77516` = col_double(),
##   Bachelors = col_character(),
##   `13` = col_double(),
##   `Never-married` = col_character(),
##   `Adm-clerical` = col_character(),
##   `Not-in-family` = col_character(),
##   White = col_character(),
##   Male = col_character(),
##   `2174` = col_double(),
##   `0` = col_double(),
##   `40` = col_double(),
##   `United-States` = col_character(),
##   `<=50K` = col_character()
## )

Data wrangling

colnames(salary)<- c("Age","workclass","fnlwgt","education","education_num","marital_status","occupation","relationship","race","gender","capital_gain","capital_loss","hours_per_week","native_country","income_bracket")
salary
## # A tibble: 32,560 x 15
##      Age workclass fnlwgt education education_num marital_status occupation
##    <dbl> <chr>      <dbl> <chr>             <dbl> <chr>          <chr>     
##  1    50 Self-emp…  83311 Bachelors            13 Married-civ-s… Exec-mana…
##  2    38 Private   215646 HS-grad               9 Divorced       Handlers-…
##  3    53 Private   234721 11th                  7 Married-civ-s… Handlers-…
##  4    28 Private   338409 Bachelors            13 Married-civ-s… Prof-spec…
##  5    37 Private   284582 Masters              14 Married-civ-s… Exec-mana…
##  6    49 Private   160187 9th                   5 Married-spous… Other-ser…
##  7    52 Self-emp… 209642 HS-grad               9 Married-civ-s… Exec-mana…
##  8    31 Private    45781 Masters              14 Never-married  Prof-spec…
##  9    42 Private   159449 Bachelors            13 Married-civ-s… Exec-mana…
## 10    37 Private   280464 Some-col…            10 Married-civ-s… Exec-mana…
## # … with 32,550 more rows, and 8 more variables: relationship <chr>,
## #   race <chr>, gender <chr>, capital_gain <dbl>, capital_loss <dbl>,
## #   hours_per_week <dbl>, native_country <chr>, income_bracket <chr>
salary %>% 
  group_by(race) %>% 
  summarize(total_capital_gain = sum(capital_gain)) %>% 
  knitr::kable()
race total_capital_gain
Amer-Indian-Eskimo 194458
Asian-Pac-Islander 1536014
Black 1905454
Other 253293
White 31197931
salary_untidy<-salary%>%
  filter(capital_gain<25000)%>%
  select(Age,education_num,capital_gain,gender)%>%
  group_by(education_num,gender)%>%
  mutate(Average = mean(capital_gain)) 
salary_untidy
## # A tibble: 32,345 x 5
## # Groups:   education_num, gender [32]
##      Age education_num capital_gain gender Average
##    <dbl>         <dbl>        <dbl> <chr>    <dbl>
##  1    50            13            0 Male    1116. 
##  2    38             9            0 Male     395. 
##  3    53             7            0 Male     266. 
##  4    28            13            0 Female   496. 
##  5    37            14            0 Female  1130. 
##  6    49             5            0 Female    87.1
##  7    52             9            0 Male     395. 
##  8    31            14        14084 Female  1130. 
##  9    42            13         5178 Male    1116. 
## 10    37            10            0 Male     512. 
## # … with 32,335 more rows
salary_untidy_plot<-ggplot(salary_untidy,aes(x=education_num,y=Average))+
  geom_col(aes(fill=gender),position = "dodge",width = .6,na.rm = FALSE)

Version with good color

salary_untidy_plot1<-salary_untidy_plot+
  scale_fill_manual(values = c('#E69F00','#0072B2'),name = "Gender")+
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(axis.text = element_text(size = 8)) +
  labs(x = "Education Index", y = "Average Capital gain") +
  scale_x_continuous(expand = c(0, 0),breaks = seq(0, 16, 2))+
  scale_y_continuous(expand = c(0, 0),breaks = seq(0, 2100, 300))+
  theme(panel.background = element_blank(),axis.line = element_line(colour = "black")) +
  theme(axis.text = element_text(size = 10))+
  ggtitle(~""*underline("US Adult Census data"))
salary_untidy_plot1

salary_untidy_plot_colorblind<-salary_untidy_plot1+
  scale_color_OkabeIto()
salary_untidy_plot_colorblind

cvd_grid(salary_untidy_plot1)

This dataset is based upon US Adult income and the source is https://www.kaggle.com/uciml/adult-census-income.I took the required columns and aggregated the capital income based on different education levels as part of my data wrangling process. Also, I tried with different geom_plots() like scatter, line or point. Since my dataset set has lots of rows, I was not able to see any clear pattern in other geom plots, so I choose geom_col() and for this dataset aesthetically it looks better.

To improve my data representation, I used geom_col to plot the processed data, removed back ground elements , changed font size of the axis text, underlined the plot title, highlighted the axis lines, changed the legend titles, put logical aesthetic sequence and so on.

I took capital income based on education level and analysed how average capital income varies for Male and Female based on education level. The education index varies from 1 to 16, 1 is the lowest and 16 is the highest. My plot shows the trend in capital gain for male and female having different education level. As the education level increases the average capital gain increases for both male and female.However, for the same education level Male have higher capital gain as compared to the females.

Initially i choose purple and magenta colors to specify Male and Female respetively but when I did color-vision-deficiency simulation using “cvd_grid” , the “Desaturated” plot was not distinguishing Genders. The shades for both Male and Female was shoing quite similar. So then I changed the color of my plot fill and validated again , now the distinguigh between male and female data is visible for all four types of colorblindness.

Version with greyscale

salary_untidy_plot1+
  scale_fill_grey() +
  theme(panel.background = element_blank()) 
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

Version with dreadful color

salary_untidy_plot2<-ggplot(salary_untidy,aes(x=education_num,y=Average))+
  geom_col(aes(fill=gender),position = "dodge",width = .6,na.rm = FALSE)+
  labs(x = "Education Index", y = "Average Capital gain")+
  scale_fill_manual(values = c('#FFFF00','#66FF00'))+
  theme(panel.grid.major = element_line(colour = "red", linetype = "dotted"),
    panel.grid.minor = element_line(colour = "blue", linetype = "dotted"))
salary_untidy_plot2

cvd_grid(salary_untidy_plot2)

The above plot seems very confusing, the background red and blue grids makes the plot’s visualisation bad. There is no header which shows less informative.The space above aesthetics are not good visualization. Also, the color choice of yellow and green for gender distinction is not good.These colors also failed the colorblindness simulation.As we can see the desaturated plot failed to show the male-female distiction. The deutanomaly and protanomaly plots are not not clear.